################################################################################
## LOAD PACKAGES
################################################################################

library(dplyr)

################################################################################
## INTER-FACTOR CORRELATIONS (IRT)
################################################################################

# Load data

general.nl.irt.inter <- read_excel("general_nl_irt_inter_correlations.xlsx")
general.fr.irt.inter <- read_excel("general_fr_irt_inter_correlations.xlsx")
youth.nl.irt.inter <- read_excel("youth_nl_irt_inter_correlations.xlsx")
youth.fr.irt.inter <- read_excel("youth_fr_irt_inter_correlations.xlsx")
brussels.nl.irt.inter <- read_excel("brussels_nl_irt_inter_correlations.xlsx")
brussels.fr.irt.inter <- read_excel("brussels_fr_irt_inter_correlations.xlsx")
flanders.nl.irt.inter <- read_excel("flanders_nl_irt_inter_correlations.xlsx")
wallonia.fr.irt.inter <- read_excel("wallonia_fr_irt_inter_correlations.xlsx")
federal.nl.irt.inter <- read_excel("federal_nl_irt_inter_correlations.xlsx")
federal.fr.irt.inter <- read_excel("federal_fr_irt_inter_correlations.xlsx")
eu.nl.irt.inter <- read_excel("eu_nl_irt_inter_correlations.xlsx")
eu.fr.irt.inter <- read_excel("eu_fr_irt_inter_correlations.xlsx")

irt.inter <- rbind(general.nl.irt.inter, general.fr.irt.inter, youth.nl.irt.inter,
                   youth.fr.irt.inter, brussels.nl.irt.inter, brussels.fr.irt.inter,
                   flanders.nl.irt.inter, wallonia.fr.irt.inter, federal.nl.irt.inter,
                   eu.nl.irt.inter, eu.fr.irt.inter)

# Duplicate and flip Factor1 and Factor2
irt.inter.df <- irt.inter %>%
  bind_rows(
    cor_data %>%
      rename(Factor1_tmp = Factor1, Factor2_tmp = Factor2) %>%
      transmute(vaa,
                Factor1 = Factor2_tmp,
                Factor2 = Factor1_tmp,
                Correlation)
  ) %>%
  arrange(vaa, Factor1, Factor2)

irt.inter3.df <- irt.inter.df %>% 
  filter(vaa %in% c("Youth – Dutch", "Youth – French",
                    "Brussels – Dutch", "Flanders",
                    "Wallonia", "EU – Dutch", "EU – French"))

irt.inter4.df <- irt.inter.df %>% 
  filter(vaa %in% c("General – Dutch", "General – French",
                    "Brussels – French", "Federal – Dutch",
                    "Federal – French"))

#irt.inter4.df$Correlation <- abs(irt.inter4.df$Correlation)

irt.inter4.df.list <- split(irt.inter4.df, irt.inter4.df$vaa)

irt.list <- as.list(irt.inter4.df.list)

# Convert vctrs_list_of to regular list (if needed)

irt.matrices <- lapply(irt.list, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  # Get full list of factors used
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  # Create an empty correlation matrix
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  # Fill matrix with correlations
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val  # Ensure symmetry
  }
  
  # Set diagonal to 1
  diag(mat) <- 1
  
  return(mat)
})

# --------------------------
# Dataset names and metadata
# --------------------------

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French")

# ------------------------------
# 2.1. Convert Correlation Matrices to DataFrame
# ------------------------------

# Function to convert a correlation matrix to a dataframe
cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))  # Convert to long format
  cor_df$Dataset <- dataset_name  # Add dataset name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]  # Remove self-correlations
  return(cor_df)
}

# Apply the conversion function to each correlation matrix
cor_df_list <- lapply(names(irt.matrices), function(ds) {
  cor_matrix_to_df(irt.matrices[[ds]], ds)
})

# Combine into one dataframe
cor_df <- do.call(rbind, cor_df_list)

# -----------------------
# 4. Plot: Correlation Networks
# -----------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color) {
  # Remove duplicated edges like F1-F2 / F2-F1
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  # Create graph
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- ggraph::create_layout(graph, layout = "kk")
  
  # Prepare edge label positions
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Optional: add buffer to layout limits to prevent cut-off
  xlim_range <- range(layout$x) + c(-0.2, 0.2)
  ylim_range <- range(layout$y) + c(-0.2, 0.2)
  
  # Final plot
  p <- ggraph(layout, expand = TRUE) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "grey"))),
                   alpha = 0.8) +
    
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 7, color = "black",
                    segment.color = NA, max.overlaps = Inf) +
    
    geom_node_point(size = 17, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 5) +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.1, 4)) +
    scale_edge_color_identity() +
    scale_linetype_manual(values = c("dotted" = "dotted", "solid" = "solid")) +
    
    labs(title = dataset_name) +
    theme_void() +
    theme(
      plot.title = element_text(size = 24, margin = margin(b = 15)),
      plot.margin = margin(t = 40, r = 40, b = 40, l = 40),
      legend.position = "none"
    )
  
  return(p)
}

# Define colors per language for a bit of variation
node_colors <- rep(c("black"), length.out = length(datasets))

# Generate all 12 network plots
network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = datasets,
  color = node_colors,
  SIMPLIFY = FALSE
)

# -----------------------
# 5. Combine Network Plots
# -----------------------

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# -----------------------
# 6. (Optional) Save Outputs
# -----------------------

# Save the combined network plot grid
ggsave("network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 1200)

datasets <- c("Brussels – French",
              "Federal – Dutch", "Federal – French",
              "General – Dutch", "General – French",
              "Brussels – Dutch", "EU – Dutch", "EU – French", 
              "Flanders", "Wallonia", "Youth – Dutch", 
              "Youth – French")

#irt.inter3.df$Correlation <- abs(irt.inter3.df$Correlation)  # Apply abs() if needed

irt.inter3.df.list <- split(irt.inter3.df, irt.inter3.df$vaa)

irt.list.3f <- as.list(irt.inter3.df.list)

# Rebuild matrices for 3-factor sets
irt.matrices.3f <- lapply(irt.list.3f, function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  
  diag(mat) <- 1
  return(mat)
})

# Merge both into one list
irt.matrices.full <- c(irt.matrices, irt.matrices.3f)

# Update cor_df_list and network_plots_from_df using the full matrix list
cor_df_list <- lapply(names(irt.matrices.full), function(ds) {
  cor_matrix_to_df(irt.matrices.full[[ds]], ds)
})

network_plots_from_df <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(irt.matrices.full),
  color = rep("black", length(cor_df_list)),
  SIMPLIFY = FALSE
)

network_grid_from_df <- wrap_plots(network_plots_from_df, ncol = 3) +
  plot_layout() &
  theme(plot.margin = margin(t = 5, b = 20))

# Save the combined network plot grid
ggsave("network_plots_grid_from_df.png", network_grid_from_df, width = 14, height = 14, dpi = 1200)


##############################


library(igraph)
library(ggraph)
library(ggplot2)
library(ggrepel)
library(patchwork)
library(dplyr)
library(tibble)
library(purrr)

# -----------------------------------------------
# Convert both 4- and 3-factor sets into matrices
# -----------------------------------------------

# Function to convert correlation data to symmetric matrix
convert_to_matrix <- function(df) {
  df <- df %>% select(Factor1, Factor2, Correlation) %>% distinct()
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]  # use abs()
    mat[f1, f2] <- val
    mat[f2, f1] <- val
  }
  diag(mat) <- 1
  return(mat)
}

# Convert each group to matrices
irt.matrices.4f <- lapply(split(irt.inter4.df, irt.inter4.df$vaa), convert_to_matrix)
irt.matrices.3f <- lapply(split(irt.inter3.df, irt.inter3.df$vaa), convert_to_matrix)

# Combine into one full list
irt.matrices.full <- c(irt.matrices.4f, irt.matrices.3f)

# -----------------------------------------------
# Average correlations per plot and overall
# -----------------------------------------------

compute_avg_correlation <- function(mat) {
  vals <- abs(mat[lower.tri(mat)])
  mean(vals, na.rm = TRUE)
}

cor_avg_per_plot <- sapply(irt.matrices.full, compute_avg_correlation)
overall_avg_correlation <- round(mean(cor_avg_per_plot), 3)

# -----------------------------------------------
# Convert matrices to data frames
# -----------------------------------------------

cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))
  cor_df$Dataset <- dataset_name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]
  return(cor_df)
}

cor_df_list <- lapply(names(irt.matrices.full), function(ds) {
  cor_matrix_to_df(irt.matrices.full[[ds]], ds)
})

# -----------------------------------------------
# Create network plots
# -----------------------------------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color, avg_corr) {
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- create_layout(graph, layout = "kk")
  
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Add more space around the plot area
  xlim_range <- range(layout$x) + c(-0.3, 0.3)
  ylim_range <- range(layout$y) + c(-0.5, 0.3)
  
  p <- ggraph(layout) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "gray"))),
                   alpha = 0.5) +
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 6, color = "black", segment.color = NA, max.overlaps = Inf) +
    geom_node_point(size = 16, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 6) +
    
    # Adjust annotation: more padding, still bottom-right
    annotate("text", 
             x = xlim_range[2] - 0.1, 
             y = ylim_range[1] + 0.2, 
             label = paste0("Mean abs. corr. = ", round(avg_corr, 2)),
             hjust = 1, vjust = 1., size = 7, fontface = "italic") +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.5, 5)) +
    scale_edge_color_identity() +
    
    theme_void() +
    theme(
      plot.title = element_text(size = 22, face = "bold", margin = margin(b = 0), hjust = 0.5),
      #plot.margin = margin(t = 15, r = 15, b = 15, l = 15),
      legend.position = "none"
    ) +
    
    labs(title = dataset_name)
  
  return(p)
}

# Create plots
node_colors <- rep("black", length(irt.matrices.full))

network_plots <- mapply(
  FUN = make_graph_plot_from_df,
  cor_df = cor_df_list,
  dataset_name = names(irt.matrices.full),
  color = node_colors,
  avg_corr = cor_avg_per_plot[names(irt.matrices.full)],
  SIMPLIFY = FALSE
)

# -----------------------------------------------
# Combine plots
# -----------------------------------------------

network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") &
  theme(plot.margin = margin(t = 0, b = 10)) +
  plot_annotation(
    caption = paste0("Overall average inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(plot.caption = element_text(hjust = 0, size = 10))
  )

# Combine plots
network_grid <- wrap_plots(network_plots, ncol = 3) +
  plot_layout(guides = "collect") +
  plot_annotation(
    caption = paste0("Overall average inter-factor correlation: ", sprintf("%.2f", overall_avg_correlation)),
    theme = theme(
      plot.caption = element_text(hjust = 0.5, size = 24, margin = margin(t = 20)),
      plot.margin = margin(b = 10, t = 0, r = 0, l = 0)  # Add extra bottom margin
    )
  )

# -----------------------------------------------
# Save output
# -----------------------------------------------

ggsave("network_plots_grid_with_all_avg_corr.png", network_grid,
       width = 16, height = 16, dpi = 1200)
